# -*- coding: utf-8 -*-
"""
Created on Mon Aug  8 11:40:06 2016

@author: ppradeep
"""

## Import functions
import csv
import numpy

from matplotlib import pyplot as plt
from matplotlib.font_manager import FontProperties 

path = 'W:/Rapid Tox/'

#%%
##******* User-defined functions *********
## Funtion to calculate metrics
def metrics (t_p, t_n, f_p, f_n):
    total = float(t_p + t_n + f_p + f_n)
    acc = round(100*float(t_p + t_n)/float(total),2)
    sens = round(100*float(t_p)/float(t_p + f_p),2)
    spec = round(100*float(t_n)/float(t_n + f_n),2)
    ba = round((sens+spec)/2,2)
    p_o = float(t_p + t_n)/total
    p_e = ((t_p + f_n)/total)*((t_p + f_p)/total) + ((f_p + t_n)/total)*((f_n + t_n)/total)
    kappa = round(((p_o - p_e)/(1 - p_e)), 2)
    return total, acc, sens, spec, ba, kappa

def autolabel(rects, label, f):
    # attach some text labels
    for rect in rects:
        height = rect.get_height()
        ax.text(rect.get_x() + rect.get_width()/2. + f, height + 0.3,
                label, ha='center', va='bottom', fontsize = 18) 

#%%
## ********** Input: CERAPP data file. Used to :
## *********** 1. Create a dictionary of chemicals with >= 4 literature sources ***********************
## **********  mark each chemical hindered or non-hindered, and save their experimental binding data *******************

f0 = open(path+'ReadAcross-Project/CERAPP_Phenols.csv','r')
readCSV0 = csv.reader(f0, delimiter=',')
header = readCSV0.next()

cerapp_casrn = {}
all_p = [] # List of all phenols
all_hp = [] # List of all hindered phenols
p_data = {} # Dictionary of all phenols data
hp_data = {} # Dictionary of only hindered phenols data

for line in readCSV0:
    phenol_id = line[0]; all_p.append(phenol_id)
    class_b = line[10] # Binder or not. 0 or 1.
    if int(line[9]) >= 4: #More than 4 lit sources 
        n_H = line[7] #Number of hindered phenolic groups in the chemical
        n_NH = line[8] #Number of non-hindered phenolic groups in the chemical
        if n_H == '0' and n_NH != '0':
            p_data[phenol_id] = ['NHP', class_b] # Not HP
        else:
            p_data[phenol_id] = ['HP', class_b] # HP
            all_hp.append(phenol_id) #List of all HPs
            hp_data[phenol_id] = ['HP', class_b] #Only HPs data
    else:
        pass
f0.close()


#########################################################################   
## ******* Select neighbors for each of the descriptor classes ********** 
## ******* and save them in sorted by distance order ********************
#########################################################################

## ****** 1. MoSS MCSS *****
f1 = open(path+'ReadAcross-Project/DistanceFiles/Cerapp-ReadAcross-MoSS-Ds.csv','r')
readCSV1 = csv.reader(f1, delimiter=',')
header = readCSV1.next()

# Read distance data
dist_mat = []
cerapp_id_m = [] #List of all cerapp ids in MOSS distance file

i = 0
for idx, line in enumerate(readCSV1):
    cerapp_id_m.append(line[0])
    end = len(line)
    dist_mat.append([])
    for dist in line[5:end]:
        dist_mat[i].append(dist)
    dist_mat[i].append('') # Need one more extra column to compensate for the blank entry for each chemical by itself.
    i = i + 1

## Generate full distance matrix
size = len(dist_mat)   
full_dist_mat = dist_mat
for i in range(size):
    full_dist_mat[i][i] = '0.0'
    for j in range(i+1, size):
        full_dist_mat[i][j] = dist_mat[j][i]

#Create a dictionary of each phenol and sort its neighbors by distance
neighbors_m = {}
for idx, distances in enumerate(full_dist_mat):
    index_sorted = numpy.argsort(distances)[::-1] #[::-1] tells numpy to iterate of the array backwords, sorting in a descending order
    key = cerapp_id_m[idx]
    for index in index_sorted:
        n = cerapp_id_m[index]
        if key in all_hp and key != n and n in p_data.keys():
            neighbors_m.setdefault(key,[]).append([n, distances[index]])
f1.close()


## ***** 2. Pubchem *****
f2 = open(path+'ReadAcross-Project/DistanceFiles/Cerapp-ReadAcross-Pubchem-TDs.csv','r')
readCSV2 = csv.reader(f2, delimiter=',')
header = readCSV2.next()[5:]
neighbors_p = {}       

for idx, line in enumerate(readCSV2):
    key = line[0] # CERAPP ID
    distances = line[5:]
    index_sorted = numpy.argsort(distances)[::-1] #[::-1] tells numpy to iterate of the array backwords, sorting in a descending order
    for index in index_sorted:
        n = header[index]
        if key in all_hp and key != n and n in p_data.keys():
            neighbors_p.setdefault(key,[]).append([n, distances[index]])
f2.close()  


## ***** 3. Chemotyper *****
f3 = open(path+'ReadAcross-Project/DistanceFiles/Cerapp-ReadAcross-Chemotyper-TDs.csv','r')
readCSV3 = csv.reader(f3, delimiter=',')
header = readCSV3.next()[1:]
neighbors_c = {}       

for idx, line in enumerate(readCSV3):
    key = line[0] # CERAPP ID
    distances = line[1:]
    index_sorted = numpy.argsort(distances)[::-1] #[::-1] tells numpy to iterate of the array backwords, sorting in a descending order
    for index in index_sorted:
        n = header[index]
        if key in all_hp and key != n and n in p_data.keys():
            neighbors_c.setdefault(key,[]).append([n, distances[index]])
f3.close()  

#%%

################################################ 
## ********** Concordance analysis *************
## ********** 1. By Tanimoto Cut-off ***********
################################################

dict_HPneighbors = {}
dict_NHPneighbors = {}

coverage = {} ## Coverage: number of phenols predicted for each threshold
thresholds = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
for threshold in thresholds:
    ## Number of phenols predicted for each threshold
    hp_m_cov = 0; hp_c_cov = 0; hp_p_cov = 0
    nhp_m_cov = 0; nhp_c_cov = 0; nhp_p_cov = 0
    
    total_m_hp = 0; total_c_hp = 0; total_p_hp = 0
    agree_m_hp = 0; agree_c_hp = 0; agree_p_hp = 0
    disagree_m_hp = 0; disagree_c_hp = 0; disagree_p_hp = 0       

    total_m_nhp = 0; total_c_nhp = 0; total_p_nhp = 0
    agree_m_nhp = 0; agree_c_nhp = 0; agree_p_nhp = 0
    disagree_m_nhp = 0; disagree_c_nhp = 0; disagree_p_nhp = 0  
    
    total_mp_hp = 0; total_mc_hp = 0; total_cp_hp = 0; total_mcp_hp = 0
    agree_mp_hp = 0; agree_mc_hp = 0; agree_cp_hp = 0; agree_mcp_hp = 0
    disagree_mp_hp = 0; disagree_mc_hp = 0; disagree_cp_hp = 0; disagree_mcp_hp = 0

    total_mp_nhp = 0; total_mc_nhp = 0; total_cp_nhp = 0; total_mcp_nhp = 0
    agree_mp_nhp = 0; agree_mc_nhp = 0; agree_cp_nhp = 0; agree_mcp_nhp = 0
    disagree_mp_nhp = 0; disagree_mc_nhp = 0; disagree_cp_nhp = 0; disagree_mcp_nhp = 0
        
    for hp in all_hp:      
        hp_class = hp_data[hp][1]

        m_hp = []; c_hp = []; p_hp = []
        m_nhp = []; c_nhp = []; p_nhp = []
     
        try:
            neighbors_moss = neighbors_m[hp]
            for neighbor in neighbors_moss:
                if float(neighbor[1]) >= threshold:
                    n_class = p_data[neighbor[0]][1]
                    try:
                        if p_data[neighbor[0]][0] == 'HP': 
                            m_hp.append(neighbor[0])
                            total_m_hp = total_m_hp + 1
                            if n_class == hp_class:
                                agree_m_hp = agree_m_hp + 1
                            else:
                                disagree_m_hp = disagree_m_hp + 1
                    except:
                        pass
                    try:
                        if p_data[neighbor[0]][0] == 'NHP': 
                            m_nhp.append(neighbor[0])
                            total_m_nhp = total_m_nhp + 1
                            if n_class == hp_class:
                                agree_m_nhp = agree_m_nhp + 1
                            else:
                                disagree_m_nhp = disagree_m_nhp + 1
                    except:
                        pass
        except:
            pass
        
        if len(m_hp) != 0:
            hp_m_cov = hp_m_cov + 1
        if len(m_nhp) != 0:
            nhp_m_cov = nhp_m_cov + 1            
 
           
        try:        
            neighbors_chemotyper = neighbors_c[hp]
            for neighbor in neighbors_chemotyper:
                if float(neighbor[1]) >= threshold:
                    n_class = p_data[neighbor[0]][1]
                    try:
                        if p_data[neighbor[0]][0] == 'HP': 
                            c_hp.append(neighbor[0])
                            total_c_hp = total_c_hp + 1
                            if n_class == hp_class:
                                agree_c_hp = agree_c_hp + 1
                            else:
                                disagree_c_hp = disagree_c_hp + 1
                    except:
                        pass
                    try:
                        if p_data[neighbor[0]][0] == 'NHP': 
                            c_nhp.append(neighbor[0])
                            total_c_nhp = total_c_nhp + 1
                            if n_class == hp_class:
                                agree_c_nhp = agree_c_nhp + 1
                            else:
                                disagree_c_nhp = disagree_c_nhp + 1
                    except:
                        pass
        except:
            pass

        if len(c_hp) != 0:
            hp_c_cov = hp_c_cov + 1
        if len(c_nhp) != 0:
            nhp_c_cov = nhp_c_cov + 1 
            
            
        try:                
            neighbors_pubchem = neighbors_p[hp]
            for neighbor in neighbors_pubchem:
                if float(neighbor[1]) >= threshold:
                    n_class = p_data[neighbor[0]][1]
                    try:
                        if p_data[neighbor[0]][0] == 'HP': 
                            p_hp.append(neighbor[0])
                            total_p_hp = total_p_hp + 1
                            if n_class == hp_class:
                                agree_p_hp = agree_p_hp + 1
                            else:
                                disagree_p_hp = disagree_p_hp + 1
                    except:
                        pass
                    try:
                        if p_data[neighbor[0]][0] == 'NHP': 
                            p_nhp.append(neighbor[0])
                            total_p_nhp = total_p_nhp + 1
                            if n_class == hp_class:
                                agree_p_nhp = agree_p_nhp + 1
                            else:
                                disagree_p_nhp = disagree_p_nhp + 1
                    except:
                        pass
        except:
            pass

        if len(p_hp) != 0:
            hp_p_cov = hp_p_cov + 1
        if len(p_nhp) != 0:
            nhp_p_cov = nhp_p_cov + 1 
            
            
        neighbors_hp_all = m_hp + c_hp + p_hp
        neighbors_hp_unique = list(set(neighbors_hp_all)) # Select unique neighbors by converting to set and then re-converting to list
        neighbors_nhp_all = m_nhp + c_nhp + p_nhp
        neighbors_nhp_unique = list(set(neighbors_nhp_all)) # Select unique neighbors by converting to set and then re-converting to list
        
        for n in neighbors_hp_unique:
            n_class = p_data[n][1]
            if n in m_hp and n in p_hp:
                total_mp_hp = total_mp_hp + 1
                if n_class == hp_class:
                    agree_mp_hp = agree_mp_hp + 1
            if n in m_hp and n in c_hp:
                total_mc_hp = total_mc_hp + 1
                if n_class == hp_class:
                    agree_mc_hp = agree_mc_hp + 1
            if n in c_hp and n in p_hp:
                total_cp_hp = total_cp_hp + 1
                if n_class == hp_class:
                    agree_cp_hp = agree_cp_hp + 1
            if n in m_hp and n in c_hp and n in p_hp:
                total_mcp_hp = total_mcp_hp + 1
                if n_class == hp_class:
                    agree_mcp_hp = agree_mcp_hp + 1
                  
        for n in neighbors_nhp_unique:
            n_class = p_data[n][1]
            if n in m_nhp and n in p_nhp:
                total_mp_nhp = total_mp_nhp + 1
                if n_class == hp_class:
                    agree_mp_nhp = agree_mp_nhp + 1
            if n in m_nhp and n in c_nhp:
                total_mc_nhp = total_mc_nhp + 1
                if n_class == hp_class:
                    agree_mc_nhp = agree_mc_nhp + 1
            if n in c_nhp and n in p_nhp:
                total_cp_nhp = total_cp_nhp + 1
                if n_class == hp_class:
                    agree_cp_nhp = agree_cp_nhp + 1
            if n in m_nhp and n in c_nhp and n in p_nhp:
                total_mcp_nhp = total_mcp_nhp + 1
                if n_class == hp_class:
                    agree_mcp_nhp = agree_mcp_nhp + 1
    
    try:
        perc_agree_mp_hp = 100*float(agree_mp_hp)/float(total_mp_hp)
    except:
        perc_agree_mp_hp = 0
    
    try:
        perc_agree_mc_hp = 100*float(agree_mc_hp)/float(total_mc_hp)
    except:
        perc_agree_mc_hp = 0 
        
    try:
        perc_agree_cp_hp = 100*float(agree_cp_hp)/float(total_cp_hp)
    except:
        perc_agree_cp_hp = 0     
    
    try:
        perc_agree_mcp_hp = 100*float(agree_mcp_hp)/float(total_mcp_hp)
    except:
        perc_agree_mcp_hp = 0                       
    
    try:
        perc_agree_mp_nhp = 100*float(agree_mp_nhp)/float(total_mp_nhp)
    except:
        perc_agree_mp_nhp = 0
    try:
        perc_agree_mc_nhp = 100*float(agree_mc_nhp)/float(total_mc_nhp)
    except:
        perc_agree_mc_nhp = 0     
    try:
        perc_agree_cp_nhp = 100*float(agree_cp_nhp)/float(total_cp_nhp)
    except:
        perc_agree_cp_nhp = 0 
       
    try:
        perc_agree_mcp_nhp = 100*float(agree_mcp_nhp)/float(total_mcp_nhp)
    except:
        perc_agree_mcp_nhp = 0
    
    dict_HPneighbors[threshold] =  [total_m_hp, 100*float(agree_m_hp)/float(total_m_hp),\
                                    total_c_hp, 100*float(agree_c_hp)/float(total_c_hp),\
                                    total_p_hp, 100*float(agree_p_hp)/float(total_p_hp), \
                                    total_mp_hp, perc_agree_mp_hp,\
                                    total_mc_hp, perc_agree_mc_hp,\
                                    total_cp_hp, perc_agree_cp_hp,\
                                    total_mcp_hp, perc_agree_mcp_hp                           
                                    ]
    dict_NHPneighbors[threshold] =  [total_m_nhp, 100*float(agree_m_nhp)/float(total_m_nhp),\
                                     total_c_nhp, 100*float(agree_c_nhp)/float(total_c_nhp),\
                                     total_p_nhp, 100*float(agree_p_nhp)/float(total_p_nhp),\
                                     total_mp_nhp, perc_agree_mp_nhp,\
                                     total_mc_nhp, perc_agree_mc_nhp,\
                                     total_cp_nhp, perc_agree_cp_nhp,\
                                     total_mcp_nhp, perc_agree_mcp_nhp
                                     ]
    coverage[threshold] = [hp_p_cov, hp_c_cov, hp_m_cov, nhp_p_cov, nhp_c_cov, nhp_m_cov]
    
    
#%%
# HP Plot
fig_hp = plt.figure(figsize=(12, 8), dpi = 100)
ax = fig_hp.add_subplot(111)

labels = ['MoSS MCSS', 'ToxPrints', 'Pubchem', 'MoSS MCSS & PubChem', 'MoSS MCSS & ToxPrints', 'ToxPrints & PubChem', 'MoSS MCSS & ToxPrints & PubChem']
colors = "rbgymck"                                    

y_m = []; y_c = []; y_p = []; y_mp = []; y_mc = []; y_cp = []; y_mcp = []
for threshold in thresholds:
    y_m.append(dict_HPneighbors[threshold][1])
    y_c.append(dict_HPneighbors[threshold][3])
    y_p.append(dict_HPneighbors[threshold][5])
    y_mp.append(dict_HPneighbors[threshold][7])
    y_mc.append(dict_HPneighbors[threshold][9])
    y_cp.append(dict_HPneighbors[threshold][11])
    y_mcp.append(dict_HPneighbors[threshold][13])
    
p1 = plt.plot(thresholds, map(float, y_m), 'o-', color = colors[0], label = labels[0])
p2 = plt.plot(thresholds, map(float,y_c), 'o-', color = colors[1], label = labels[1])
p3 = plt.plot(thresholds, map(float,y_p), 'o-', color = colors[2], label = labels[2])
p4 = plt.plot(thresholds, map(float, y_mp), 'v-', color = colors[3], label = labels[3])
p5 = plt.plot(thresholds, map(float,y_mc), 'v-', color = colors[4], label = labels[4])
p6 = plt.plot(thresholds, map(float,y_cp), 'v-', color = colors[5], label = labels[5])
p7 = plt.plot(thresholds[0:8], map(float,y_mcp[0:8]), 's-', color = colors[6], label = labels[6])

plt.title("Hindered Analogs", fontsize=32, y=1.05)


plt.axis([0.1,0.9,40,100])
plt.yticks(range(40,105,5))

plt.xlabel("Similarity Threshold",fontsize=30)
plt.ylabel("Concordance (%)",fontsize=30)

ax.xaxis.labelpad = 15
ax.yaxis.labelpad = 15

fontP = FontProperties()
fontP.set_size('14') 
plt.legend(loc = "upper left", prop = fontP)

plt.tick_params(axis='both', which='major', labelsize=16)
plt.savefig(path+'ReadAcross-Project/Cut-Off/RA-Concordance-HPs.png',bbox_inches='tight')

#%%
# NHP Plot
fig_nhp = plt.figure(figsize=(12,8), dpi = 100)
ax = fig_nhp.add_subplot(111)

labels = ['MoSS MCSS', 'ToxPrints', 'Pubchem', 'MoSS MCSS & PubChem', 'MoSS MCSS & ToxPrints', 'ToxPrints & PubChem', 'MoSS MCSS & ToxPrints & PubChem']
colors = "rbgymck"   

y_m = []; y_c = []; y_p = []; y_mp = []; y_mc = []; y_cp = []; y_mcp = []
for threshold in thresholds:
    y_m.append(dict_NHPneighbors[threshold][1])
    y_c.append(dict_NHPneighbors[threshold][3])
    y_p.append(dict_NHPneighbors[threshold][5])
    y_mp.append(dict_NHPneighbors[threshold][7])
    y_mc.append(dict_NHPneighbors[threshold][9])
    y_cp.append(dict_NHPneighbors[threshold][11])
    y_mcp.append(dict_NHPneighbors[threshold][13])
    
p1 = plt.plot(thresholds, map(float, y_m), 'o-', color = colors[0], label = labels[0])
p2 = plt.plot(thresholds, map(float,y_c), 'o-', color = colors[1], label = labels[1])
p3 = plt.plot(thresholds, map(float,y_p), 'o-', color = colors[2], label = labels[2])
p4 = plt.plot(thresholds, map(float, y_mp), 'v-', color = colors[3], label = labels[3])
p5 = plt.plot(thresholds[0:8], map(float,y_mc[0:8]), 'v-', color = colors[4], label = labels[4])
p6 = plt.plot(thresholds, map(float,y_cp), 'v-', color = colors[5], label = labels[5])
p7 = plt.plot(thresholds[0:8], map(float,y_mcp[0:8]), 's-', color = colors[6], label = labels[6])

plt.title("Non-hindered Analogs", fontsize=32, y=1.05)

fontP = FontProperties()
fontP.set_size('14') 
plt.axis([0.1,0.9,40,100])
plt.yticks(range(40,105,5))

plt.xlabel("Similarity Threshold",fontsize=30)
plt.ylabel("Concordance (%)",fontsize=30)

ax.xaxis.labelpad = 15
ax.yaxis.labelpad = 15

plt.legend(loc = "upper left", prop = fontP)

plt.tick_params(axis='both', which='major', labelsize=16)
plt.savefig(path+'ReadAcross-Project/Cut-Off/RA-Concordance-NHPs.png',bbox_inches='tight')


#%%
## Total Concordance all Phenols
################################################ 
## ********** Concordance analysis *************
## ********** 1. By Tanimoto Cut-off ***********
################################################


dict_neighbors = {}

coverage_all = {} ## Coverage: number of phenols predicted for each threshold
thresholds = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
for threshold in thresholds:
    
    ## Number of phenols predicted for each threshold
    m_cov = 0; c_cov = 0; p_cov = 0
    
    total_m = 0; total_c = 0; total_p = 0
    agree_m = 0; agree_c = 0; agree_p = 0
    disagree_m = 0; disagree_c = 0; disagree_p = 0       

    total_mp = 0; total_mc = 0; total_cp = 0; total_mcp = 0
    agree_mp = 0; agree_mc = 0; agree_cp = 0; agree_mcp = 0
    disagree_mp = 0; disagree_mc = 0; disagree_cp = 0; disagree_mcp = 0
        
    for hp in all_hp:      
        hp_class = hp_data[hp][1]

        m = []; c = []; p = []
     
        try:
            neighbors_moss = neighbors_m[hp]
            for neighbor in neighbors_moss:
                if float(neighbor[1]) >= threshold:
                    n_class = p_data[neighbor[0]][1]
                    m.append(neighbor[0])
                    total_m = total_m + 1
                    if n_class == hp_class:
                        agree_m = agree_m + 1
                    else:
                        disagree_m = disagree_m + 1
        except:
            pass
        
        if len(m) != 0:
            m_cov = m_cov + 1
       
         
        try:        
            neighbors_chemotyper = neighbors_c[hp]
            for neighbor in neighbors_chemotyper:
                if float(neighbor[1]) >= threshold:
                    n_class = p_data[neighbor[0]][1]
                    c.append(neighbor[0])
                    total_c = total_c + 1
                    if n_class == hp_class:
                        agree_c = agree_c + 1
                    else:
                        disagree_c = disagree_c + 1
        except:
            pass

        if len(c) != 0:
            c_cov = c_cov + 1
           
            
        try:                
            neighbors_pubchem = neighbors_p[hp]
            for neighbor in neighbors_pubchem:
                if float(neighbor[1]) >= threshold:
                    n_class = p_data[neighbor[0]][1]
                    p.append(neighbor[0])
                    total_p = total_p + 1
                    if n_class == hp_class:
                        agree_p = agree_p + 1
                    else:
                        disagree_p = disagree_p + 1
        except:
            pass

        if len(p) != 0:
            p_cov = p_cov + 1
            
            
        neighbors_all = m + c + p
        neighbors_unique = list(set(neighbors_all)) # Select unique neighbors by converting to set and then re-converting to list
        
        for n in neighbors_unique:
            n_class = p_data[n][1]
            if n in m and n in p:
                total_mp = total_mp + 1
                if n_class == hp_class:
                    agree_mp = agree_mp + 1
            if n in m and n in c:
                total_mc = total_mc + 1
                if n_class == hp_class:
                    agree_mc = agree_mc + 1
            if n in c and n in p:
                total_cp = total_cp + 1
                if n_class == hp_class:
                    agree_cp = agree_cp + 1
            if n in m and n in c and n in p:
                total_mcp = total_mcp + 1
                if n_class == hp_class:
                    agree_mcp = agree_mcp + 1
                  
    
    try:
        perc_agree_mp = 100*float(agree_mp)/float(total_mp)
    except:
        perc_agree_mp = 0
    
    try:
        perc_agree_mc = 100*float(agree_mc)/float(total_mc)
    except:
        perc_agree_mc = 0 
        
    try:
        perc_agree_cp = 100*float(agree_cp)/float(total_cp)
    except:
        perc_agree_cp = 0     
    
    try:
        perc_agree_mcp = 100*float(agree_mcp)/float(total_mcp)
    except:
        perc_agree_mcp = 0                       
    
    
    dict_neighbors[threshold] =  [total_m, 100*float(agree_m)/float(total_m),\
                                    total_c, 100*float(agree_c)/float(total_c),\
                                    total_p, 100*float(agree_p)/float(total_p), \
                                    total_mp, perc_agree_mp,\
                                    total_mc, perc_agree_mc,\
                                    total_cp, perc_agree_cp,\
                                    total_mcp, perc_agree_mcp                           
                                    ]

    coverage_all[threshold] = [p_cov, c_cov, m_cov]
    

#%%
fig = plt.figure(figsize=(12,8), dpi = 100)
ax = fig.add_subplot(111)
labels = ['MoSS MCSS', 'ToxPrints', 'Pubchem', 'MoSS MCSS & PubChem', 'MoSS MCSS & ToxPrints', 'ToxPrints & PubChem', 'MoSS MCSS & ToxPrints & PubChem']
colors = "rbgymck"                                   

y_m = []; y_c = []; y_p = []; y_mp = []; y_mc = []; y_cp = []; y_mcp = []
for threshold in thresholds:
    y_m.append(dict_neighbors[threshold][1])
    y_c.append(dict_neighbors[threshold][3])
    y_p.append(dict_neighbors[threshold][5])
    y_mp.append(dict_neighbors[threshold][7])
    y_mc.append(dict_neighbors[threshold][9])
    y_cp.append(dict_neighbors[threshold][11])
    y_mcp.append(dict_neighbors[threshold][13])
    
p1 = plt.plot(thresholds, map(float, y_m), 'o-', color = colors[0], label = labels[0])
p2 = plt.plot(thresholds, map(float,y_c), 'o-', color = colors[1], label = labels[1])
p3 = plt.plot(thresholds, map(float,y_p), 'o-', color = colors[2], label = labels[2])
p4 = plt.plot(thresholds, map(float, y_mp), 'v-', color = colors[3], label = labels[3])
p5 = plt.plot(thresholds, map(float,y_mc), 'v-', color = colors[4], label = labels[4])
p6 = plt.plot(thresholds, map(float,y_cp), 'v-', color = colors[5], label = labels[5])
p7 = plt.plot(thresholds[0:8], map(float,y_mcp[0:8]), 's-', color = colors[6], label = labels[6])

plt.title("Both Hindered & Non-hindered Analogs", fontsize=32, y=1.05)

fontP = FontProperties()
fontP.set_size('14') 
plt.axis([0.1,0.9,40,100])
plt.yticks(range(40,105,5))

plt.xlabel("Similarity Threshold",fontsize=30)
plt.ylabel("Concordance (%)",fontsize=30)

ax.xaxis.labelpad = 15
ax.yaxis.labelpad = 15

plt.legend(loc = "upper left", prop = fontP)

plt.tick_params(axis='both', which='major', labelsize=16)
plt.savefig(path+'ReadAcross-Project/Cut-Off/RA-Concordance-All.png',bbox_inches='tight')

#%%
# Concordance of HP, NHP and both as analogs at similarity threshold 0.9
    
fig = plt.figure(figsize=(12,8), dpi = 100)
ax = fig.add_subplot(111)

y_p_hp = [dict_HPneighbors[0.9][5], coverage[0.9][0]]
y_c_hp= [dict_HPneighbors[0.9][3], coverage[0.9][1]]
y_m_hp= [dict_HPneighbors[0.9][1], coverage[0.9][2]]

y_p_nhp = [dict_NHPneighbors[0.9][5], coverage[0.9][3]]
y_c_nhp = [dict_NHPneighbors[0.9][3], coverage[0.9][4]]
y_m_nhp = [dict_NHPneighbors[0.9][1], coverage[0.9][5]]

y_p = [dict_neighbors[threshold][5], coverage_all[0.9][0]]
y_c = [dict_neighbors[threshold][3], coverage_all[0.9][1]]
y_m = [dict_neighbors[threshold][1], coverage_all[0.9][2]]


x = range(11) # x = no. of analogs
ind = numpy.arange(len(x))

for i in x:
    if i == 0:
        rects4 = ax.bar(ind[i-1]-1, y_m_hp[0], 1, color='red', align='center')
        rects5 = ax.bar(ind[i-1], y_m_nhp[0], 1, color='red', align='center') 
        rects6 = ax.bar(ind[i-1]+1, y_m[0], 1, color='red', align='center') 
        autolabel(rects4, y_m_hp[1] , 0)
        autolabel(rects5, y_m_nhp[1] , 0)
        autolabel(rects6, y_m[1] , 0)
    if i == 3:
        rects1 = ax.bar(ind[i-1]-1, y_p_hp[0], 1, color='green', align='center')
        rects2 = ax.bar(ind[i-1], y_p_nhp[0], 1, color='green', align='center')   
        rects3 = ax.bar(ind[i-1]+1, y_p[0], 1 , color='green', align='center') 
        autolabel(rects1, y_p_hp[1] , 0)
        autolabel(rects2, y_p_nhp[1] , 0)
        autolabel(rects3, y_p[1] , 0)
    if i == 7:    
        rects7 = ax.bar(ind[i-1]-1, y_c_hp[0], 1, color='mediumblue', align='center')
        rects8 = ax.bar(ind[i-1], y_c_nhp[0], 1, color='mediumblue', align='center')
        rects9 = ax.bar(ind[i-1]+1, y_c[0], 1, color='mediumblue', align='center') 
        autolabel(rects7, y_c_hp[1] , 0)
        autolabel(rects8, y_c_nhp[1] , 0)
        autolabel(rects9, y_c[1] , 0)    
        
ax.legend((rects1, rects4, rects7), ('PubChem', 'ToxPrints', 'MoSS MCSS'), fontsize = 20, loc='upper right')

x_ticks = ['HP', 'NHP', 'Both', '', 'HP', 'NHP', 'Both', '', 'HP', 'NHP', 'Both']
plt.xticks(range(1,12,1), x_ticks)
plt.yticks(range(0,105,5))

plt.xlabel("Analogs",fontsize=30)
plt.ylabel("Concordance (%)",fontsize=30)

ax.xaxis.labelpad = 15
ax.yaxis.labelpad = 15

plt.tick_params(axis='both', which='major', labelsize=16)
plt.savefig(path+'ReadAcross-Project/Cut-Off/RA-Concordance-90.png',bbox_inches='tight')
